knitr::opts_chunk$set(echo = TRUE)
options(knitr.table.format = "html")
options(digits=5)
options(scipen = 100)
knitr::opts_chunk$set(tidy.opts=list(width.cutoff=80), tidy=TRUE)
#install.packages("pacman")
library(pacman) #for quick load/install of packages
p_load(dplyr, readr, tidyverse,reticulate, lubridate,janitor, sqldf,googlesheets4)
p_load(skimr,splitstackshape,stringr,rqdatatable)
p_load(moments)
p_load(kableExtra)
p_load(ggplot2, plotly,echarts4r,ggpubr,forcats,scales,RColorBrewer)
p_load(ggthemes)
#Advanced EDA packages
library("Rtsne")
library("DataExplorer") #used for basic stats, qq plots and bar-plots
Attaching package: ‘DataExplorer’
The following object is masked from ‘package:rquery’:
drop_columns
library("SmartEDA") #for more descriptive statistics Registered S3 method overwritten by 'GGally':
method from
+.gg ggplot2
library("dlookr")
Attaching package: ‘dlookr’
The following objects are masked from ‘package:moments’:
kurtosis, skewness
The following object is masked from ‘package:tidyr’:
extract
The following object is masked from ‘package:base’:
transform
library("ggstatsplot")You can cite this package as:
Patil, I. (2021). Visualizations with statistical details: The 'ggstatsplot' approach.
Journal of Open Source Software, 6(61), 3167, doi:10.21105/joss.03167
Attaching package: ‘ggstatsplot’
The following object is masked from ‘package:wrapr’:
:=
library("flextable") #for nicer looking tables
Attaching package: ‘flextable’
The following objects are masked from ‘package:ggpubr’:
border, font, rotate
The following objects are masked from ‘package:plotly’:
highlight, style
The following objects are masked from ‘package:kableExtra’:
as_image, footnote
The following object is masked from ‘package:purrr’:
compose
library("summarytools")
Attaching package: ‘summarytools’
The following object is masked from ‘package:wrapr’:
view
The following object is masked from ‘package:tibble’:
view
library("skimr")
library("performance") #imputing outliers Final Dataset can be found here.
Relabeling regions for countries where regions were ‘NA’ and replacing null values in country column to be ‘US’
Additionally, creating new region column to differentiate CA and US countries, and finally filtering out study that had 0 conversions.
Final_CLS_2022_Study_List <- read_csv("FinalDataset_2022_Update.csv") %>%
mutate(region = case_when(
country %in% c('CA', 'US', 'US + CA', 'BR', 'MX', 'CL', 'SPLATAM - (AR, CL, CO, MX, PE)',
'LAC-Others (BO, CR, DO, EC, GT, HN, NI, PA, PR, PY, SV, UY)', 'LAC-Others (BO, CR, DO, EC, GT, HN, NI, PA, PY)') ~ 'AMER',
TRUE ~ region),
country = case_when(
is.na(country) ~ 'US',
TRUE ~ country),
#search = case_when(
#channel == 'Search' ~ 'Search',
#TRUE ~ 'Non-Search'),
region_v2 = case_when(
country %in% c('CA', 'US', 'US + CA') ~ 'AMER_USCA', TRUE ~ region)) %>%
filter(exposed != 0) Rows: 513 Columns: 22── Column specification ────────────────────────────────────────────────────────────────────────────────────────
Delimiter: ","
chr (8): quarter, region, country, pa, channel, tactic, conversion, study_name
dbl (14): study_id, year, treatment_user_count, exposed, control_user_count, scaled_control, control, scalin...
ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
#TBD
#WRITING SHEETS WILL BE COMMENTED OUT UNLESS REFRESH IS NECESSARY
#write_sheet(Final_CLS_2021_Study_List, ss = #'https://docs.google.com/spreadsheets/d/146_bmm-FgOeOQub3o4WVtt-9Aw8pqYvkQnwsHmRdZDs/edit#gid#=587747443',
# sheet = 'Final_Clean_Dataset_v2')
skim(Final_CLS_2022_Study_List) %>%
as.tibble() %>%
select(skim_type, skim_variable,complete_rate)Warning: `as.tibble()` was deprecated in tibble 2.0.0.
Please use `as_tibble()` instead.
The signature and semantics have changed, see `?as_tibble`.
nrow(Final_CLS_2022_Study_List)[1] 512
Final_CLS_2022_Study_List %>%
filter(channel =="Search") %>%
group_by(pa, conversion) %>%
summarize(count = n()) %>%
kbl() %>%
kable_material(c("striped", "hover","condensed","responsive"),full_width = F,fixed_thead = T)`summarise()` has grouped output by 'pa'. You can override using the `.groups` argument.
| pa | conversion | count |
|---|---|---|
| Chrome | Desktop Downloads | 94 |
| Chromebook | Chromebook Microsite Referral Clicks Q4 2015 | 13 |
| DSM | Global - Google Store - All Products - Order Complete - 390-271-2647) | 20 |
| Google Cloud | GWS MCC - Free Trial Submit. | 35 |
| Pixel | All Mobile Pseudo Conversions | 7 |
| Pixel | Google Store - Product - Pixel 6 - Order Complete - 390-271-2647) | 3 |
##Creating Grouped conversion events
Final_CLS_2022_Study_List =
Final_CLS_2022_Study_List %>%
mutate(
parsed_type = parse_number(conversion),
grouped_conversion = case_when(
conversion %in% c('Chromebook Microsite Referral Clicks Q4 2015','Type 251422729 (Chromebooks Microsite Referral Clicks (Q4 2017))') ~ 'Chromebook Referrals',
conversion %in% c('Desktop Downloads','Type 11541547 (Desktop Download)') ~
'Desktop Downloads',
pa == 'Pixel'~ 'Mobile Conversions',
pa == 'DSM' ~ 'Non-Mobile Device Conversions',
conversion == 'Type 302982954 (Lena - P Lead)' ~ 'Lena P Lead' ,
conversion == 'Type 288347008 (LENA - B Lead)' ~ 'Lena B Lead' ,
conversion == 'Type 288697653 (LENA - Q Lead)' ~ 'Lena Q Lead' ,
parsed_type == 330755641 ~ 'Microsite Conversions',
parsed_type == 14257803 ~ 'Enterprise Signups',
parsed_type == 289680712 ~ 'Google(iOs) First Open',
parsed_type == 256522942 ~ 'YouTube TV - Web - Trial Start',
parsed_type == 452391534 ~ 'Trial Signups Complete' ,
TRUE ~ conversion
)
)
Final_CLS_2022_Study_List %>%
filter(channel == 'Search') %>%
group_by(pa, grouped_conversion) %>%
summarize(count = n())%>%
kbl() %>%
kable_material(c("striped", "hover","condensed","responsive"),full_width = F,fixed_thead = T)`summarise()` has grouped output by 'pa'. You can override using the `.groups` argument.
| pa | grouped_conversion | count |
|---|---|---|
| Chrome | Desktop Downloads | 94 |
| Chromebook | Chromebook Referrals | 13 |
| DSM | Non-Mobile Device Conversions | 20 |
| Google Cloud | GWS MCC - Free Trial Submit. | 35 |
| Pixel | Mobile Conversions | 10 |
Final_CLS_2022_Study_List %>%
filter(channel == 'Search') %>%
group_by(quarter) %>%
summarize(Overall_Studies = n_distinct(study_id,quarter)) %>%
#summarize(Overall_Studies = n()) %>%
ggplot(aes(x=quarter, y=Overall_Studies)) + geom_bar(stat = 'identity', fill = "lightblue") +
ggtitle("Quarters in Search Studies") +
xlab("Quarter") +
ylab("Count") +
coord_flip() +
theme_minimal()Final_CLS_2022_Study_List %>%
filter(channel == 'Search') %>%
group_by(pa) %>%
summarize(Overall_Studies = n_distinct(study_id,quarter)) %>%
ggplot(aes(x=pa, y=Overall_Studies)) + geom_bar(stat = 'identity', fill = 'lightblue') +
ggtitle("PA's in Search Studies") +
xlab("PA") +
ylab("Count") +
coord_flip() +
theme_minimal()Final_CLS_2022_Study_List %>%
filter(channel == 'Search') %>%
filter(tactic != "All") %>%
group_by(region_v2) %>%
#summarize(Overall_Studies = n_distinct(study_id,quarter)) %>%
summarize(Overall_Studies = n()) %>%
ggplot(aes(x=region_v2, y=Overall_Studies)) + geom_bar(stat = 'identity', fill = 'lightblue') +
ggtitle("Regions in Search Studies") +
xlab("Region") +
ylab("Count") +
coord_flip() +
theme_minimal()Final_CLS_2022_Study_List %>%
filter(channel == 'Search') %>%
filter(tactic != 'All') %>%
group_by(tactic) %>%
#summarize(Overall_Studies = n_distinct(study_id,quarter)) %>%
summarize(Overall_Studies = n()) %>%
ggplot(aes(x=tactic, y=Overall_Studies)) + geom_bar(stat = 'identity', fill = 'lightblue') +
ggtitle("Tactics in Search Studies") +
xlab("Tactic") +
ylab("Count") +
coord_flip() +
theme_minimal()#my_xlab <- paste(levels(Final_CLS_2021_Study_List$grouped_conversion),"\n(N=",table(Final_CLS_2021_Study_List$grouped_conversion),")",sep="")
n_fun <- function(x){
return(data.frame(y = median(x), label = paste0("n = ",length(x))))
}
p <-
Final_CLS_2022_Study_List %>%
filter(channel =='Search') %>%
filter(tactic != 'All') %>%
mutate(class = fct_reorder(as.factor(grouped_conversion), cost_spent_on_exposed_group, .fun='median')) %>%
ggplot() +
aes(
x = reorder(as.factor(class),cost_spent_on_exposed_group),y = cost_spent_on_exposed_group, fill = grouped_conversion
#absolute_lift*100/cost_spent_on_exposed_group
) +
geom_boxplot() +
stat_summary(fun.data = n_fun, geom = "text", size = 2) +
theme_bw() +
theme(legend.position="none",
axis.text.x=element_blank(),
axis.title.y = element_blank()
) +
xlab("Grouped Conversion Type")+
# scale_x_discrete(labels=my_xlab) +
ylab("Cost Spent on Exposed Group") +
coord_flip()
ggplotly(p, tooltip = c("text"))#my_xlab <- paste(levels(Final_CLS_2021_Study_List$grouped_conversion),"\n(N=",table(Final_CLS_2021_Study_List$grouped_conversion),")",sep="")
n_fun <- function(x){
return(data.frame(y = median(x), label = paste0("n = ",length(x))))
}
p <-
Final_CLS_2022_Study_List %>%
filter(channel =='Search') %>%
filter(tactic !='All') %>%
mutate(class = fct_reorder(as.factor(grouped_conversion), absolute_lift, .fun='median')) %>%
ggplot() +
aes(
x = reorder(as.factor(class),absolute_lift),y = absolute_lift, fill = grouped_conversion
#absolute_lift*100/cost_spent_on_exposed_group
) +
geom_boxplot() +
stat_summary(fun.data = n_fun, geom = "text", size = 2) +
theme_bw() +
theme(legend.position="none",
axis.text.x=element_blank(),
axis.title.y = element_blank()
) +
xlab("Grouped Conversion Type")+
# scale_x_discrete(labels=my_xlab) +
ylab("Lift") +
coord_flip()
ggplotly(p, tooltip = c("text"))#my_xlab <- paste(levels(Final_CLS_2021_Study_List$grouped_conversion),"\n(N=",table(Final_CLS_2021_Study_List$grouped_conversion),")",sep="")
n_fun <- function(x){
return(data.frame(y = median(x), label = paste0("n = ",length(x))))
}
p <-
Final_CLS_2022_Study_List %>%
filter(channel =='Search') %>%
filter(conversion != 'Desktop Downloads') %>%
filter(tactic!='All') %>%
mutate(class = fct_reorder(as.factor(grouped_conversion), absolute_lift, .fun='median')) %>%
ggplot() +
aes(
x = reorder(as.factor(class),absolute_lift),y = absolute_lift, fill = grouped_conversion
#absolute_lift*100/cost_spent_on_exposed_group
) +
geom_boxplot() +
stat_summary(fun.data = n_fun, geom = "text", size = 2) +
theme_bw() +
theme(legend.position="none",
axis.text.x=element_blank(),
axis.title.y = element_blank()
) +
xlab("Grouped Conversion Type")+
# scale_x_discrete(labels=my_xlab) +
ylab("Lift") +
coord_flip()
ggplotly(p, tooltip = c("text"))Very Significant positive lift event outliers in Free Trial Submit and Non-Mobile Device Conversions
Negative lifts in Desktop Downloads, Non-Mobile Device Conversions and Mobile Conversions
Much larger variation in Non-Mobile Device Conversions than Mobile Conversions
Desktop Downloads has largest range in values with minimum at -3252 and maximum at 171269
Desktop Downloads had maximum values of absolute lift of all conversion events, but lowest range of cost spent with the exception of outliers
Non-Mobile Device Conversions cost median 3 times bigger than Mobile Conversions had much larger range on cost spent than Mobile Conversions, but lift was not significantly higher and included significantly more negative lift
##Count of negative lifts by Grouped Conversion
Final_CLS_2022_Study_List %>%
filter(channel == "Search") %>%
filter(tactic != "All") %>%
group_by(grouped_conversion) %>%
summarise(Negative_Count = sum(absolute_lift < 0), Percent_Negative = 100 * (sum(Negative_Count)/n()))#my_xlab <- paste(levels(Final_CLS_2021_Study_List$grouped_conversion),"\n(N=",table(Final_CLS_2021_Study_List$grouped_conversion),")",sep="")
n_fun <- function(x){
return(data.frame(y = median(x), label = paste0("n = ",length(x))))
}
p <-
Final_CLS_2022_Study_List %>%
filter(channel =='Search') %>%
filter(tactic != 'All') %>%
mutate(class = fct_reorder(as.factor(region_v2), cost_spent_on_exposed_group, .fun='median')) %>%
ggplot() +
aes(
x = reorder(as.factor(class),cost_spent_on_exposed_group),y = cost_spent_on_exposed_group, fill = region_v2
#absolute_lift*100/cost_spent_on_exposed_group
) +
geom_boxplot() +
stat_summary(fun.data = n_fun, geom = "text", size = 2) +
theme_bw() +
theme(legend.position="none",
axis.text.x=element_blank(),
axis.title.y = element_blank()
) +
xlab("Region Type")+
# scale_x_discrete(labels=my_xlab) +
ylab("Cost Spent on Exposed Group") +
coord_flip()
ggplotly(p, tooltip = c("text"))#my_xlab <- paste(levels(Final_CLS_2021_Study_List$grouped_conversion),"\n(N=",table(Final_CLS_2021_Study_List$grouped_conversion),")",sep="")
n_fun <- function(x){
return(data.frame(y = median(x), label = paste0("n = ",length(x))))
}
p <-
Final_CLS_2022_Study_List %>%
filter(channel =='Search') %>%
filter(tactic != 'All') %>%
mutate(class = fct_reorder(as.factor(region_v2), absolute_lift, .fun='median')) %>%
ggplot() +
aes(
x = reorder(as.factor(class),absolute_lift),y = absolute_lift, fill = region_v2
#absolute_lift*100/cost_spent_on_exposed_group
) +
geom_boxplot() +
stat_summary(fun.data = n_fun, geom = "text", size = 2) +
theme_bw() +
theme(legend.position="none",
axis.text.x=element_blank(),
axis.title.y = element_blank()
) +
xlab("Region Type")+
# scale_x_discrete(labels=my_xlab) +
ylab("Lift") +
coord_flip()
ggplotly(p, tooltip = c("text"))#my_xlab <- paste(levels(Final_CLS_2021_Study_List$grouped_conversion),"\n(N=",table(Final_CLS_2021_Study_List$grouped_conversion),")",sep="")
n_fun <- function(x){
return(data.frame(y = median(x), label = paste0("n = ",length(x))))
}
p <-
Final_CLS_2022_Study_List %>%
filter(channel =='Search') %>%
filter(tactic != 'All') %>%
filter(region_v2 != "AO") %>%
mutate(class = fct_reorder(as.factor(region_v2), absolute_lift, .fun='median')) %>%
ggplot() +
aes(
x = reorder(as.factor(class),absolute_lift),y = absolute_lift, fill = region_v2
#absolute_lift*100/cost_spent_on_exposed_group
) +
geom_boxplot() +
stat_summary(fun.data = n_fun, geom = "text", size = 2) +
theme_bw() +
theme(legend.position="none",
axis.text.x=element_blank(),
axis.title.y = element_blank()
) +
xlab("Region Type")+
# scale_x_discrete(labels=my_xlab) +
ylab("Lift") +
coord_flip()
ggplotly(p, tooltip = c("text"))2.US + CA cost median over ten times median of the rest of AMER countries
##Count of negative lifts by Region
Final_CLS_2022_Study_List %>%
filter(channel == "Search") %>%
filter(tactic != "All") %>%
group_by(region_v2) %>%
summarise(Negative_Count = sum(absolute_lift < 0),
Percent_Negative = 100 * (sum(Negative_Count)/n()))Final_CLS_2022_Study_List %>%
summarise(Overall_spend = sum(cost_spent_on_exposed_group),
Overall_Studies = n_distinct(study_id,quarter)
) Final_CLS_2022_Study_List %>%
filter(channel == 'Search') %>%
filter(tactic != 'All') %>% #filtering where tactic != All to exclude overall spend for each study. can also use tactic == 'All' to confirm totals add up
summarise(Overall_spend = sum(cost_spent_on_exposed_group),
Overall_Studies = n_distinct(study_id,quarter)
) Study 22 had maximum spend at over 5 million
Final_CLS_2022_Study_List %>%
filter(channel == "Search") %>%
filter(tactic == "All") %>%
select(study_id, cost_spent_on_exposed_group)####Significantly more was spent on Google Cloud Maximum Spend on Google Cloud and minimum on Chromebook
Final_CLS_2022_Study_List %>%
filter(channel == 'Search') %>%
filter(tactic != 'All') %>%
group_by(pa) %>%
summarise(Overall_spend = sum(cost_spent_on_exposed_group),
Overall_Studies = n_distinct(study_id,quarter),
Average_Spend = mean(cost_spent_on_exposed_group),
Overall_instances = n()
) Final_CLS_2022_Study_List %>%
filter(channel == 'Search') %>%
filter(tactic != 'All') %>%
group_by(Significant_Spend) %>%
summarise(Overall_spend = sum(cost_spent_on_exposed_group))%>%
mutate(Percent_spend = 100 * Overall_spend/(sum(Overall_spend)))Final_CLS_2022_Study_List %>%
filter(channel == 'Search') %>%
filter(tactic != 'All') %>%
group_by(pa, Significant_Spend) %>%
summarise(Overall_spend = sum(cost_spent_on_exposed_group))%>%
mutate(Percent_spend = 100 * Overall_spend/(sum(Overall_spend)))`summarise()` has grouped output by 'pa'. You can override using the `.groups` argument.
Final_CLS_2022_Study_List %>%
filter(channel == 'Search') %>%
filter(tactic != 'All') %>%
group_by(pa, Significant_Spend) %>%
summarise(Overall_spend = sum(cost_spent_on_exposed_group))%>%
ggplot(aes(x=pa, y=Overall_spend, fill=as.factor(Significant_Spend))) +
geom_bar(position="dodge", stat="identity") +
xlab("PA") +
ylab("Total Spend") +
ggtitle("Significant Spend by PA") +
coord_flip() +
theme_minimal()`summarise()` has grouped output by 'pa'. You can override using the `.groups` argument.
NAFinal_CLS_2022_Study_List %>%
filter(channel == 'Search') %>%
filter(tactic != 'All') %>%
filter(!pa %in% c("Chromebook", "Google Cloud")) %>%
group_by(pa, Significant_Spend) %>%
summarise(Overall_spend = sum(cost_spent_on_exposed_group))%>%
ggplot(aes(x=pa, y=Overall_spend, fill=as.factor(Significant_Spend))) +
geom_bar(position="dodge", stat="identity") +
xlab("PA") +
ylab("Total Spend") +
ggtitle("Spend by PA and Significant Spend Excluding Chromebook & Google Cloud") +
coord_flip() +
theme_minimal()`summarise()` has grouped output by 'pa'. You can override using the `.groups` argument.
All of spend on Google Cloud and Chromebook was significant
Pixel was the only PA with majority of spend as non-significant, keep in mind it only appeared in 7 instances of the data
Google Cloud averaged highest spend and had largest overall spend
Final_CLS_2022_Study_List %>%
filter(channel == 'Search') %>%
filter(tactic != 'All') %>%
group_by(region_v2, Significant_Spend) %>%
summarise(Overall_spend = sum(cost_spent_on_exposed_group))%>%
mutate(Percent_spend = 100 * Overall_spend/(sum(Overall_spend)))`summarise()` has grouped output by 'region_v2'. You can override using the `.groups` argument.
Final_CLS_2022_Study_List %>%
filter(channel == 'Search') %>%
filter(tactic != 'All') %>%
group_by(region_v2, Significant_Spend) %>%
summarise(Overall_spend = sum(cost_spent_on_exposed_group)) %>%
ggplot(aes(x=region_v2, y=Overall_spend, fill=as.factor(Significant_Spend))) +
geom_bar(position="dodge", stat="identity") +
xlab("Region") +
ylab("Total Spend") +
ggtitle("Spend by Region and Significant Spend") +
coord_flip() +
theme_minimal()`summarise()` has grouped output by 'region_v2'. You can override using the `.groups` argument.
Non-Significant Studies too low to be seen on barplot #### Bar Chart of Significant Spend by Region - Excluding US + CA
Final_CLS_2022_Study_List %>%
filter(channel == 'Search') %>%
filter(tactic != 'All') %>%
filter(!region_v2 %in% c('AMER_USCA')) %>%
group_by(region_v2, Significant_Spend) %>%
summarise(Overall_spend = sum(cost_spent_on_exposed_group))%>%
ggplot(aes(x=region_v2, y=Overall_spend, fill=as.factor(Significant_Spend))) +
geom_bar(position="dodge", stat="identity") +
xlab("Region") +
ylab("Total Spend") +
ggtitle("Spend by Region and Significant Spend Excluding CA & US") +
coord_flip() +
theme_minimal()`summarise()` has grouped output by 'region_v2'. You can override using the `.groups` argument.
Final_CLS_2022_Study_List %>%
filter(channel == 'Search') %>%
filter(tactic != 'All') %>%
group_by(tactic, Significant_Spend) %>%
summarise(Overall_spend = sum(cost_spent_on_exposed_group))%>%
mutate(Percent_spend = 100 * Overall_spend/(sum(Overall_spend)))`summarise()` has grouped output by 'tactic'. You can override using the `.groups` argument.
Final_CLS_2022_Study_List %>%
filter(channel == 'Search') %>%
filter(tactic != 'All') %>%
group_by(tactic, Significant_Spend) %>%
summarise(Overall_spend = sum(cost_spent_on_exposed_group))%>%
ggplot(aes(x=tactic, y=Overall_spend, fill=as.factor(Significant_Spend))) +
geom_bar(position="dodge", stat="identity") +
xlab("Tactic") +
ylab("Total Spend") +
ggtitle("Significant Spend by Tactic") +
coord_flip() +
theme_minimal()`summarise()` has grouped output by 'tactic'. You can override using the `.groups` argument.
NAAll regions with the exception of CA + US were 99%+ Significant, CA + US were 84% significant
Significant Spend % within SKWS was highest when compared to other tactics
###3D Scatterplots to see relationships between spend, duration and lift
#install.packages("scatterplot3d")
#install.packages("rgl")
#install.packages("rdlwidget")
#library(scatterplot3d)
#library("rgl")
#Scatter3D_Graph <- Final_CLS_2022_Study_List%>%
# filter(channel == 'Search') %>%
#filter(tactic !='All')
scatterplot3d(Scatter3D_Graph$cost_spent_on_exposed_group,
Scatter3D_Graph$duration,
Scatter3D_Graph$absolute_lift,
main="3D Scatterplot",
angle=55,
xlab = "Cost Spent",
ylab = "Duration",
zlab = "Lift")
plot3d(Scatter3D_Graph$cost_spent_on_exposed_group,
Scatter3D_Graph$duration,
Scatter3D_Graph$absolute_lift,
main="3D Scatterplot",
angle=55,
xlab = "Cost Spent",
ylab = "Duration",
zlab = "Lift")##Scatterplot of Total Cost Spent vs. Absolute Lift for all Search
Final_CLS_2022_Study_List %>%
filter(channel =='Search') %>%
filter(tactic != 'All')%>%
ggplot(aes(x = cost_spent_on_exposed_group, y = absolute_lift)) +
geom_point() +
xlab("Cost Spent") +
ylab("Lift") +
ggtitle("Cost vs. Lift")#Scatterplot of Total Cost vs. Absolute Lift, for each individual study
Final_CLS_2022_Study_List %>%
filter(channel =='Search') %>%
filter(tactic == 'All')%>%
ggplot(aes(x = cost_spent_on_exposed_group, y = absolute_lift)) +
geom_point() +
xlab("Cost Spent") +
ylab("Lift") +
ggtitle("Cost vs. Lift")##Scatterplot of Cost Spent vs. Absolute Lift, by Region
Final_CLS_2022_Study_List %>%
filter(channel =='Search') %>%
filter(tactic != 'All')%>%
ggplot(aes(x = cost_spent_on_exposed_group, y = absolute_lift, color = region_v2)) +
geom_point() +
xlab("Cost Spent") +
ylab("Lift") +
ggtitle("Cost vs. Lift")##Scatterplot of Cost Spent vs. Absolute Lift, by Conversion Group
Final_CLS_2022_Study_List %>%
filter(channel =='Search') %>%
filter(tactic != 'All')%>%
ggplot(aes(x = cost_spent_on_exposed_group, y = absolute_lift, color = grouped_conversion)) +
geom_point() +
xlab("Cost Spent") +
ylab("Lift") +
ggtitle("Cost vs. Lift")Majority of studies with low cost and high lift are Desktop Downloads
US + CA tends to be high cost, low lift
Desktop Downloads conversion has lowest spend with highest lift
##Scatterplot of Cost Spent vs. Absolute Lift, by Year
Final_CLS_2022_Study_List %>%
filter(channel =='Search') %>%
filter(tactic != 'All')%>%
ggplot(aes(x = cost_spent_on_exposed_group, y = absolute_lift, color = as.factor(year))) +
geom_point() +
xlab("Cost Spent") +
ylab("Lift") +
ggtitle("Cost vs. Lift")##Scatterplot of Cost Spent vs Absolute Lift, by Significant Spend
Final_CLS_2022_Study_List %>%
filter(channel =='Search') %>%
filter(tactic != 'All')%>%
ggplot(aes(x = cost_spent_on_exposed_group, y = absolute_lift, color = as.factor(Significant_Spend))) +
geom_point() +
xlab("Cost Spent") +
ylab("Lift") +
ggtitle("Cost vs. Lift Search")##Scatterplot of Duration vs. Cost Spent for all Search ### Not as linear as expected
Final_CLS_2022_Study_List %>%
filter(channel =='Search') %>%
filter(tactic == 'All')%>%
ggplot(aes(x = duration, y = cost_spent_on_exposed_group)) +
geom_point() +
xlab("Duration") +
ylab("Cost") +
ggtitle("Duration vs. Cost")
Final_CLS_2022_Study_List %>%
filter(channel == "Search") %>%
filter(tactic == "All") %>%
filter(cost_spent_on_exposed_group == max(cost_spent_on_exposed_group)) %>%
select(study_id)
Final_CLS_2022_Study_List %>%
filter(channel == "Search") %>%
filter(tactic == "All") %>%
filter(duration == max(duration)) %>%
select(study_id)NA##Scatterplot of Duration vs. Lift, by Region
Final_CLS_2022_Study_List %>%
filter(channel =='Search') %>%
filter(tactic != 'All')%>%
ggplot(aes(x = duration, y = absolute_lift, color = region_v2)) +
geom_point() +
xlab("Duration") +
ylab("Lift") +
ggtitle("Duration vs. Lift")##Scatterplot of Duration vs. Lift, by PA
Final_CLS_2022_Study_List %>%
filter(channel =='Search') %>%
filter(tactic != 'All')%>%
ggplot(aes(x = duration, y = absolute_lift, color = pa)) +
geom_point() +
xlab("Duration") +
ylab("Lift") +
ggtitle("Duration vs. Lift") 1.Study 22 has max cost spent, yet on the lower range of duration
2.Study 1 has max duration and very low cost
pa_plot <- function(pa2) {
plot_scatter <- Final_CLS_2022_Study_List %>%
filter(channel == 'Search') %>%
filter(tactic != 'All') %>%
filter(pa == pa2) %>%
ggplot(aes(x = cost_spent_on_exposed_group, y = absolute_lift, color =region_v2)) +
geom_point() +
xlab("Cost") +
ylab("Lift") +
ggtitle(paste(pa2, " Cost vs. Lift"))
print(plot_scatter)
}
pa_columns <- c("Chrome", "Chromebook", "DSM", "Google Cloud", "Pixel")
for (i in pa_columns) {
pa_plot(i)
} NA
NAMost linear trends seem to be Chromebook, DSM and Google Cloud
DSM only contains AMER regions
Chrome costs remain low with high lift, performed worst in US + CA region
For Chrome, Chromebook and Google Cloud, it generally requires higher cost for lift in US + CA regions
region_plot <- function(region2) {
plot_scatter <- Final_CLS_2022_Study_List %>%
filter(channel == 'Search') %>%
filter(tactic != 'All') %>%
filter(region_v2 == region2) %>%
ggplot(aes(x = cost_spent_on_exposed_group, y = absolute_lift, color = pa)) +
geom_point() +
xlab("Cost") +
ylab("Lift") +
ggtitle(paste(region2, " Cost vs. Lift"))
print(plot_scatter)
}
region_cols <- c("AMER","AMER_USCA", "AO", "APAC", "EMEA")
for (i in region_cols) {
region_plot(i)
}EMEA region has highest lift with low costs
In all regions Chrome seemed to be highest performing, with low costs resulting in high lift
US + CA cost vs. lift relationship much more dispersed than rest of AMER region
Google Cloud seems to require significant cost for lift across all regions when compared to all pa’s
Final_CLS_2022_Study_List %>%
filter(channel =='Search') %>%
filter(tactic != 'All')%>%
filter(pa == 'Chrome') %>%
summary() study_id year quarter region country pa
Min. : 2.0 Min. :2021 Length:92 Length:92 Length:92 Length:92
1st Qu.: 2.0 1st Qu.:2021 Class :character Class :character Class :character Class :character
Median : 9.5 Median :2022 Mode :character Mode :character Mode :character Mode :character
Mean : 9.5 Mean :2022
3rd Qu.:17.0 3rd Qu.:2022
Max. :17.0 Max. :2022
channel tactic conversion study_name treatment_user_count
Length:92 Length:92 Length:92 Length:92 Min. : 654
Class :character Class :character Class :character Class :character 1st Qu.: 29666
Mode :character Mode :character Mode :character Mode :character Median : 102492
Mean : 177532
3rd Qu.: 185546
Max. :1645490
exposed control_user_count scaled_control control scaling_factor
Min. : 139 Min. : 34 Min. : 19 Min. : 1 Min. :19.2
1st Qu.: 5505 1st Qu.: 769 1st Qu.: 3817 1st Qu.: 88 1st Qu.:23.7
Median : 59904 Median : 2098 Median : 51396 Median : 1181 Median :46.4
Mean : 112396 Mean : 4380 Mean : 99313 Mean : 2357 Mean :46.7
3rd Qu.: 111601 3rd Qu.: 4133 3rd Qu.: 100093 3rd Qu.: 2110 3rd Qu.:69.9
Max. :1120981 Max. :69027 Max. :1068200 Max. :38842 Max. :76.8
cost_spent_on_exposed_group absolute_lift relative_lift probability_of_lift duration
Min. : 90 Min. : -3252 Min. :-0.0373 Min. :0.000 Min. :47.0
1st Qu.: 2207 1st Qu.: 1306 1st Qu.: 0.0898 1st Qu.:1.000 1st Qu.:47.0
Median : 4121 Median : 6967 Median : 0.1603 Median :1.000 Median :67.5
Mean : 19748 Mean : 13083 Mean : 0.4246 Mean :0.949 Mean :67.5
3rd Qu.: 12436 3rd Qu.: 13439 3rd Qu.: 0.3061 3rd Qu.:1.000 3rd Qu.:88.0
Max. :625238 Max. :171270 Max. : 6.2263 Max. :1.000 Max. :88.0
Significant_Spend region_v2 parsed_type grouped_conversion
Min. :0.000 Length:92 Min. : NA Length:92
1st Qu.:1.000 Class :character 1st Qu.: NA Class :character
Median :1.000 Mode :character Median : NA Mode :character
Mean :0.924 Mean :NaN
3rd Qu.:1.000 3rd Qu.: NA
Max. :1.000 Max. : NA
NA's :92
Final_CLS_2022_Study_List %>%
filter(channel =='Search') %>%
filter(tactic != 'All')%>%
filter(pa == 'Chrome') %>%
ggplot(aes(x = duration, y = absolute_lift)) +
geom_point() +
xlab("Duration") +
ylab("Lift") +
ggtitle("Duration vs. Lift")Final_CLS_2022_Study_List %>%
filter(channel =='Search') %>%
filter(tactic != 'All')%>%
filter(pa == 'Chrome') %>%
ggplot(aes(x = duration, y = cost_spent_on_exposed_group)) +
geom_point() +
xlab("Duration") +
ylab("Cost") +
ggtitle("Duration vs. Cost")##Summary of Findings 1. Duration seems to be either short or long, and doesnt seem to have any apparent effect on cost or lift, with only a slight increase in lift for longer studies
Final_CLS_2022_Study_List[c("year","quarter","region_v2","pa","tactic","Significant_Spend", "channel")] %>%
filter(channel =="Search") %>%
filter(tactic != "All") %>%
plot_bar(ncol =3)NAFinal_CLS_2022_Study_List[c("year","quarter","region_v2","pa","tactic","Significant_Spend", "channel")] %>%
filter(channel =="Search") %>%
filter(tactic != "All") %>%
plot_bar(by = "pa")##Frequency and Percentage for each variable
Final_CLS_2022_Study_List[c("year","quarter","region_v2","pa","tactic","Significant_Spend", "channel")] %>%
filter(channel == "Search") %>%
filter(tactic != "All") %>%
ExpCatViz()[[1]]
[[2]]
[[3]]
[[4]]
[[5]]
[[6]]
NAExpCatViz(
Final_CLS_2022_Study_List %>%
filter(channel == "Search") %>%
filter(tactic != "All") %>%
select(grouped_conversion, Significant_Spend),
target="grouped_conversion")[[1]]
ExpCatViz(
Final_CLS_2022_Study_List %>%
filter(channel == "Search") %>%
filter(tactic != "All") %>%
select(tactic, Significant_Spend),
target="tactic")[[1]]
ExpCatViz(
Final_CLS_2022_Study_List %>%
filter(channel == "Search") %>%
filter(tactic != "All") %>%
select(region_v2, Significant_Spend),
target="region_v2")[[1]]
Final_CLS_2022_Study_List %>%
filter(channel == "Search") %>%
filter(tactic != "All") %>%
ggbarstats(
x = Significant_Spend,
y = pa,
label = "both"
)Final_CLS_2022_Study_List %>%
filter(channel == "Search") %>%
filter(tactic != "All") %>%
ggbarstats(
x = Significant_Spend,
y = region_v2,
label = "both"
)dlookr::describe(Final_CLS_2022_Study_List %>%
filter(channel == "Search") %>%
filter(tactic != "All")) %>%
flextable()described_variables | n | na | mean | sd | se_mean | IQR | skewness | kurtosis | p00 | p01 | p05 | p10 | p20 | p25 | p30 | p40 | p50 | p60 | p70 | p75 | p80 | p90 | p95 | p99 | p100 |
study_id | 155 | 0 | 11.65161 | 8.20167 | 0.658774 | 15.00000 | -0.043312 | -1.6566 | 1.000000 | 1.540000 | 2.00000000 | 2.000000 | 2.000000 | 2.000000 | 2.00000 | 5.00000 | 17.00000 | 17.00000 | 17.00000 | 17.0000 | 18.6000 | 22.0000 | 22.0000 | 24.9200 | 26.000 |
year | 155 | 0 | 2,021.53548 | 0.50036 | 0.040190 | 1.00000 | -0.143689 | -2.0054 | 2,021.000000 | 2,021.000000 | 2,021.00000000 | 2,021.000000 | 2,021.000000 | 2,021.000000 | 2,021.00000 | 2,021.00000 | 2,022.00000 | 2,022.00000 | 2,022.00000 | 2,022.0000 | 2,022.0000 | 2,022.0000 | 2,022.0000 | 2,022.0000 | 2,022.000 |
treatment_user_count | 155 | 0 | 457,762.51613 | 1,185,538.27011 | 95,224.725370 | 266,941.00000 | 6.644009 | 54.8222 | 654.000000 | 1,441.560000 | 5,485.80000000 | 11,200.000000 | 38,717.000000 | 56,416.500000 | 71,529.60000 | 99,436.80000 | 125,019.00000 | 161,855.80000 | 247,577.60000 | 323,357.5000 | 434,000.8000 | 1,128,409.8000 | 1,664,630.0000 | 5,230,596.0600 | 11,617,859.000 |
exposed | 155 | 0 | 71,207.11613 | 160,074.19786 | 12,857.468978 | 78,737.00000 | 4.641277 | 24.5423 | 139.000000 | 359.760000 | 714.70000000 | 1,085.800000 | 2,551.800000 | 3,163.000000 | 3,803.00000 | 6,209.20000 | 14,711.00000 | 43,641.20000 | 63,087.40000 | 81,900.0000 | 90,303.2000 | 130,640.2000 | 227,906.6000 | 855,874.0800 | 1,120,981.000 |
control_user_count | 155 | 0 | 44,631.42581 | 143,434.51256 | 11,520.937292 | 29,985.50000 | 6.827639 | 52.3805 | 34.000000 | 54.320000 | 220.70000000 | 440.000000 | 1,259.400000 | 1,637.500000 | 2,105.00000 | 3,067.80000 | 5,540.00000 | 11,461.60000 | 23,418.80000 | 31,623.0000 | 41,277.4000 | 100,496.2000 | 151,440.0000 | 706,246.5800 | 1,296,187.000 |
scaled_control | 155 | 0 | 62,111.68960 | 143,283.81273 | 11,508.832790 | 69,080.83573 | 4.721750 | 25.8190 | 19.235294 | 77.297210 | 434.69925000 | 585.671345 | 1,293.528268 | 1,649.765944 | 2,101.39129 | 3,393.16837 | 11,041.08430 | 31,737.96502 | 56,406.34664 | 70,730.6017 | 85,623.1226 | 113,075.2718 | 206,076.3153 | 763,568.6357 | 1,068,200.366 |
control | 155 | 0 | 1,866.98710 | 4,053.96663 | 325.622436 | 1,673.50000 | 6.008328 | 47.3133 | 1.000000 | 10.700000 | 23.00000000 | 39.200000 | 92.600000 | 152.500000 | 179.20000 | 341.00000 | 652.00000 | 1,204.60000 | 1,556.80000 | 1,826.0000 | 2,109.0000 | 4,879.6000 | 6,711.7000 | 17,010.2600 | 38,842.000 |
scaling_factor | 155 | 0 | 31.27883 | 26.11838 | 2.097879 | 56.76344 | 0.645883 | -1.2445 | 0.996782 | 1.423958 | 2.22880141 | 2.266575 | 11.154693 | 11.219395 | 13.59452 | 19.16022 | 23.43750 | 23.86605 | 27.75251 | 67.9828 | 68.7321 | 70.4112 | 71.1102 | 72.5379 | 76.786 |
cost_spent_on_exposed_group | 155 | 0 | 134,528.59452 | 332,405.51105 | 26,699.453151 | 86,459.31000 | 4.728519 | 27.9018 | 90.000000 | 196.676800 | 1,053.73900000 | 1,463.724000 | 2,670.646000 | 3,350.750000 | 4,106.55400 | 7,512.36200 | 12,760.87000 | 22,625.23000 | 67,448.91600 | 89,810.0600 | 171,164.7320 | 387,824.4760 | 587,876.5770 | 1,513,617.3546 | 2,712,280.150 |
absolute_lift | 155 | 0 | 9,095.42653 | 20,662.61897 | 1,659.661494 | 8,510.23254 | 5.485143 | 35.0432 | -3,252.225705 | -1,709.366144 | 1.48683763 | 120.448297 | 524.176452 | 762.431576 | 1,046.15218 | 2,063.39825 | 3,406.37731 | 5,254.67275 | 8,061.19861 | 9,272.6641 | 10,397.7782 | 17,922.0839 | 27,719.6345 | 119,158.6372 | 171,269.689 |
relative_lift | 155 | 0 | 0.73080 | 1.09322 | 0.087810 | 0.93334 | 3.097510 | 13.6685 | -0.037338 | -0.021587 | 0.00085966 | 0.041007 | 0.070284 | 0.093168 | 0.11107 | 0.16127 | 0.21809 | 0.36987 | 0.80828 | 1.0265 | 1.3333 | 1.9605 | 2.3963 | 5.1451 | 7.632 |
probability_of_lift | 155 | 0 | 0.94617 | 0.19143 | 0.015376 | 0.00000 | -3.886648 | 14.9914 | 0.000000 | 0.000486 | 0.50729000 | 0.999840 | 1.000000 | 1.000000 | 1.00000 | 1.00000 | 1.00000 | 1.00000 | 1.00000 | 1.0000 | 1.0000 | 1.0000 | 1.0000 | 1.0000 | 1.000 |
duration | 155 | 0 | 72.17419 | 26.19536 | 2.104062 | 41.00000 | 2.279661 | 11.1550 | 45.000000 | 46.080000 | 47.00000000 | 47.000000 | 47.000000 | 47.000000 | 47.00000 | 63.00000 | 67.00000 | 88.00000 | 88.00000 | 88.0000 | 88.0000 | 96.0000 | 103.0000 | 156.8200 | 220.000 |
Significant_Spend | 155 | 0 | 0.90968 | 0.28757 | 0.023098 | 0.00000 | -2.886456 | 6.4142 | 0.000000 | 0.000000 | 0.00000000 | 1.000000 | 1.000000 | 1.000000 | 1.00000 | 1.00000 | 1.00000 | 1.00000 | 1.00000 | 1.0000 | 1.0000 | 1.0000 | 1.0000 | 1.0000 | 1.000 |
parsed_type | 12 | 143 | 4.00000 | 0.00000 | 0.000000 | 0.00000 | 4.000000 | 4.000000 | 4.00000000 | 4.000000 | 4.000000 | 4.000000 | 4.00000 | 4.00000 | 4.00000 | 4.00000 | 4.00000 | 4.0000 | 4.0000 | 4.0000 | 4.0000 | 4.0000 | 4.000 |
Final_CLS_2022_Study_List %>%
filter(channel == "Search") %>%
filter(tactic != "All") %>%
group_by(pa) %>%
select(treatment_user_count, exposed, control_user_count, scaled_control, control, scaling_factor,
cost_spent_on_exposed_group, absolute_lift, relative_lift, duration, pa) %>%
univar_numeric() %>%
knitr::kable()Adding missing grouping variables: `pa`
|
Final_CLS_2022_Study_List %>%
filter(channel == "Search") %>%
filter(tactic != "All") %>%
group_by(pa) %>%
select(treatment_user_count, exposed, control_user_count, scaled_control, control, scaling_factor,
cost_spent_on_exposed_group, absolute_lift, relative_lift, duration, pa) %>%
diagnose_numeric() %>%
flextable()Adding missing grouping variables: `pa`Adding missing grouping variables: `pa`Adding missing grouping variables: `pa`Adding missing grouping variables: `pa`Adding missing grouping variables: `pa`Adding missing grouping variables: `pa`Adding missing grouping variables: `pa`Adding missing grouping variables: `pa`Adding missing grouping variables: `pa`Adding missing grouping variables: `pa`
variables | min | Q1 | mean | median | Q3 | max | zero | minus | outlier |
treatment_user_count | 654.000000 | 29,665.7500000 | 177,532.271739 | 102,492.000000 | 185,546.250000 | 1,645,490.00000 | 0 | 0 | 8 |
treatment_user_count | 24,329.000000 | 46,842.0000000 | 71,247.750000 | 59,908.000000 | 80,322.250000 | 149,889.00000 | 0 | 0 | 2 |
treatment_user_count | 40,312.000000 | 74,012.0000000 | 321,009.000000 | 260,403.000000 | 345,992.000000 | 1,285,238.00000 | 0 | 0 | 1 |
treatment_user_count | 37,945.000000 | 343,664.0000000 | 1,572,608.387097 | 931,367.000000 | 1,611,873.000000 | 11,617,859.00000 | 0 | 0 | 3 |
treatment_user_count | 15,801.000000 | 53,611.5000000 | 120,181.571429 | 110,255.000000 | 125,698.000000 | 356,596.00000 | 0 | 0 | 1 |
exposed | 139.000000 | 5,505.2500000 | 112,395.608696 | 59,904.000000 | 111,600.750000 | 1,120,981.00000 | 0 | 0 | 8 |
exposed | 513.000000 | 1,097.0000000 | 3,176.583333 | 2,219.500000 | 5,176.000000 | 8,695.00000 | 0 | 0 | 0 |
exposed | 2,104.000000 | 2,853.0000000 | 26,190.461538 | 13,903.000000 | 23,044.000000 | 111,394.00000 | 0 | 0 | 2 |
exposed | 884.000000 | 3,595.5000000 | 9,704.161290 | 6,298.000000 | 8,873.500000 | 51,998.00000 | 0 | 0 | 5 |
exposed | 255.000000 | 656.5000000 | 2,469.000000 | 1,535.000000 | 4,487.000000 | 5,206.00000 | 0 | 0 | 0 |
control_user_count | 34.000000 | 768.7500000 | 4,380.141304 | 2,098.500000 | 4,132.750000 | 69,027.00000 | 0 | 0 | 10 |
control_user_count | 10,771.000000 | 20,803.0000000 | 31,645.666667 | 26,420.500000 | 35,799.250000 | 66,860.00000 | 0 | 0 | 2 |
control_user_count | 5,540.000000 | 11,683.0000000 | 51,433.769231 | 31,039.000000 | 60,248.000000 | 175,595.00000 | 0 | 0 | 2 |
control_user_count | 2,997.000000 | 27,384.0000000 | 164,274.741935 | 63,524.000000 | 131,202.500000 | 1,296,187.00000 | 0 | 0 | 5 |
control_user_count | 5,650.000000 | 26,723.5000000 | 53,427.714286 | 37,802.000000 | 74,072.500000 | 128,950.00000 | 0 | 0 | 0 |
scaled_control | 19.235294 | 3,817.1348773 | 99,312.808764 | 51,396.465265 | 100,093.272447 | 1,068,200.36600 | 0 | 0 | 8 |
scaled_control | 59.430208 | 446.9747827 | 1,517.284022 | 981.720227 | 2,535.916876 | 4,050.99346 | 0 | 0 | 0 |
scaled_control | 1,931.504020 | 2,805.6606210 | 25,104.755448 | 12,938.178380 | 22,679.975710 | 108,284.86830 | 0 | 0 | 2 |
scaled_control | 519.100767 | 1,724.3705375 | 4,229.900950 | 2,257.894937 | 3,925.435878 | 21,488.94725 | 0 | 0 | 4 |
scaled_control | 92.517249 | 557.9569654 | 2,119.617532 | 1,138.231327 | 3,978.150134 | 4,534.35995 | 0 | 0 | 0 |
control | 1.000000 | 88.0000000 | 2,356.782609 | 1,181.000000 | 2,109.500000 | 38,842.00000 | 0 | 0 | 9 |
control | 26.000000 | 199.5000000 | 673.166667 | 439.000000 | 1,120.250000 | 1,807.00000 | 0 | 0 | 0 |
control | 288.000000 | 669.0000000 | 3,038.000000 | 1,300.000000 | 5,358.000000 | 12,807.00000 | 0 | 0 | 1 |
control | 41.000000 | 135.0000000 | 549.870968 | 180.000000 | 315.000000 | 5,518.00000 | 0 | 0 | 5 |
control | 91.000000 | 176.5000000 | 1,134.428571 | 407.000000 | 1,270.500000 | 4,549.00000 | 0 | 0 | 1 |
scaling_factor | 19.235294 | 23.7128804 | 46.723841 | 46.361287 | 69.924483 | 76.78602 | 0 | 0 | 0 |
scaling_factor | 2.214318 | 2.2403883 | 2.253276 | 2.256750 | 2.266488 | 2.28578 | 0 | 0 | 0 |
scaling_factor | 1.770905 | 3.4504836 | 9.893576 | 6.801603 | 18.773382 | 19.28194 | 0 | 0 | 0 |
scaling_factor | 1.937529 | 11.1659266 | 12.148111 | 12.511355 | 14.590574 | 14.69924 | 0 | 0 | 2 |
scaling_factor | 0.996782 | 1.9066551 | 2.482502 | 2.863225 | 3.152607 | 3.39898 | 0 | 0 | 0 |
cost_spent_on_exposed_group | 90.000000 | 2,207.0325000 | 19,748.198478 | 4,120.810000 | 12,436.412500 | 625,237.86000 | 0 | 0 | 8 |
cost_spent_on_exposed_group | 7,050.620000 | 11,406.7275000 | 33,131.326667 | 23,577.095000 | 51,539.115000 | 80,842.52000 | 0 | 0 | 0 |
cost_spent_on_exposed_group | 6,318.120000 | 53,155.5600000 | 396,955.398462 | 196,424.690000 | 357,801.850000 | 2,712,280.15000 | 0 | 0 | 1 |
cost_spent_on_exposed_group | 28,022.860000 | 126,877.2300000 | 407,884.644194 | 301,800.180000 | 443,690.595000 | 1,536,618.12000 | 0 | 0 | 4 |
cost_spent_on_exposed_group | 3,196.860000 | 23,770.7150000 | 118,953.974286 | 54,964.620000 | 71,298.345000 | 584,378.22000 | 0 | 0 | 1 |
absolute_lift | -3,252.225705 | 1,305.7008445 | 13,082.799934 | 6,967.413760 | 13,438.808308 | 171,269.68860 | 0 | 5 | 7 |
absolute_lift | 363.203392 | 548.2542870 | 1,659.299312 | 1,302.962516 | 2,243.657452 | 4,644.00654 | 0 | 0 | 0 |
absolute_lift | -1,286.868292 | 47.3393795 | 1,085.706091 | 430.949458 | 1,059.602792 | 9,336.86470 | 0 | 1 | 1 |
absolute_lift | 202.241667 | 2,366.7577395 | 5,474.260340 | 3,233.980781 | 5,712.137336 | 30,509.05275 | 0 | 0 | 3 |
absolute_lift | -26.584701 | 74.5324922 | 349.382468 | 210.503836 | 534.204363 | 1,044.28443 | 0 | 2 | 0 |
relative_lift | -0.037338 | 0.0898097 | 0.424635 | 0.160341 | 0.306139 | 6.22630 | 0 | 5 | 17 |
relative_lift | 0.489627 | 0.8534501 | 1.982900 | 1.119202 | 1.952710 | 7.63197 | 0 | 0 | 1 |
relative_lift | -0.011884 | 0.0071755 | 0.052035 | 0.062282 | 0.091487 | 0.13228 | 0 | 1 | 0 |
relative_lift | 0.270826 | 0.8375500 | 1.509788 | 1.664331 | 2.016623 | 3.21847 | 0 | 0 | 0 |
relative_lift | -0.022054 | 0.0710344 | 0.418940 | 0.292950 | 0.381686 | 1.75624 | 0 | 2 | 1 |
duration | 47.000000 | 47.0000000 | 67.500000 | 67.500000 | 88.000000 | 88.00000 | 0 | 0 | 0 |
duration | 103.000000 | 103.0000000 | 103.000000 | 103.000000 | 103.000000 | 103.00000 | 0 | 0 | 0 |
duration | 45.000000 | 65.0000000 | 77.538462 | 84.000000 | 96.000000 | 96.00000 | 0 | 0 | 0 |
duration | 55.000000 | 55.0000000 | 62.096774 | 63.000000 | 63.000000 | 77.00000 | 0 | 0 | 2 |
duration | 67.000000 | 72.5000000 | 115.428571 | 78.000000 | 149.000000 | 220.00000 | 0 | 0 | 0 |
##Summary of Findings 1. Pixel PA had lowest average lift but highest average duration
Final_CLS_2022_Study_List %>%
filter(channel == "Search") %>%
filter(tactic != "All") %>%
select(absolute_lift, cost_spent_on_exposed_group, duration, relative_lift) %>%
plot_histogram(ncol =2)Final_CLS_2022_Study_List%>%
filter(channel == "Search") %>%
filter(tactic != "All") %>%
select(absolute_lift, cost_spent_on_exposed_group, relative_lift, duration) %>%
plot_qq()Final_CLS_2022_Study_List%>%
filter(channel == "Search") %>%
filter(tactic != "All") %>%
select(absolute_lift, cost_spent_on_exposed_group, relative_lift, duration, pa, grouped_conversion, region_v2) %>%
plot_qq(by = "pa")## Diagnosis plots let you see how distributions would change with log and sqrt transformation in case normality isn't met
Final_CLS_2022_Study_List %>%
filter(channel == "Search") %>%
filter(tactic != "All") %>%
group_by(pa) %>%
plot_normality(absolute_lift)Final_CLS_2022_Study_List %>%
filter(channel == "Search") %>%
filter(tactic != "All") %>%
group_by(pa) %>%
plot_normality(cost_spent_on_exposed_group)Final_CLS_2022_Study_List %>%
filter(channel == "Search") %>%
filter(tactic != "All") %>%
select(absolute_lift, cost_spent_on_exposed_group, relative_lift, pa, region_v2, grouped_conversion) %>%
plot_boxplot(by = "pa")#install.packages("PMCMRplus")
Final_CLS_2022_Study_List %>%
filter(channel == "Search") %>%
filter(tactic != "All") %>%
select(absolute_lift, cost_spent_on_exposed_group, pa, region_v2, grouped_conversion) %>%
ggbetweenstats(x = pa, y = absolute_lift, type = "np")Findings: 1. All differences in distributions of absolute lift between pa’s are significant
Final_CLS_2022_Study_List %>%
filter(channel == "Search") %>%
filter(tactic != "All") %>%
filter(pa != "Chrome") %>%
select(absolute_lift, cost_spent_on_exposed_group, pa, region_v2, grouped_conversion) %>%
ggbetweenstats(x = pa, y = absolute_lift, type = "np")Final_CLS_2022_Study_List %>%
filter(channel == "Search") %>%
filter(tactic != "All") %>%
filter(!pa %in% c("Chrome", "Google Cloud")) %>%
select(absolute_lift, cost_spent_on_exposed_group, pa, region_v2, grouped_conversion) %>%
ggbetweenstats(x = pa, y = absolute_lift, type = "np")Final_CLS_2022_Study_List %>%
filter(channel == "Search") %>%
filter(tactic != "All") %>%
select(absolute_lift, cost_spent_on_exposed_group, pa, region_v2, grouped_conversion) %>%
ggbetweenstats(x = region_v2, y = absolute_lift, type = "np")Final_CLS_2022_Study_List %>%
filter(channel == "Search") %>%
filter(tactic != "All") %>%
select(absolute_lift, cost_spent_on_exposed_group, pa, region_v2, grouped_conversion) %>%
ggbetweenstats(x = pa, y = cost_spent_on_exposed_group, type = "np")Final_CLS_2022_Study_List %>%
filter(channel == "Search") %>%
filter(tactic != "All") %>%
filter(pa != "DSM") %>%
select(absolute_lift, cost_spent_on_exposed_group, pa, region_v2, grouped_conversion) %>%
ggbetweenstats(x = pa, y = cost_spent_on_exposed_group, type = "np")Final_CLS_2022_Study_List %>%
filter(channel == "Search") %>%
filter(tactic != "All") %>%
correlate(cost_spent_on_exposed_group, absolute_lift)correlate_func <- function(pa_val) {
plot_corr<- Final_CLS_2022_Study_List %>%
filter(channel == "Search") %>%
filter(tactic != "All") %>%
filter(pa == pa_val) %>%
correlate(cost_spent_on_exposed_group, absolute_lift)
print(plot_corr)
}
pa_vals = c("Chrome", "Chromebook", "DSM", "Pixel", "Google Cloud")
for (i in pa_vals) {
correlate_func(i)
}Findings 1. All pa’s except Pixel have correlation coefficients >0.5 for absolute lift and cost spent 2. Chromebook and DSM have over .90 correlation between lift and cost 3. Pixel data shows negative correlation between lift and cost - only has 7 datapoints *** ##TSNE graph by grouped conversion
Final_CLS_2022_Study_List_2 <-
Final_CLS_2022_Study_List %>%
filter(channel =="Search") %>%
filter(tactic != "All") %>%
select(-c(study_id, year, quarter)) %>%
mutate(ID = row_number())
#extracting categorical columns
Final_CLS_2022_Study_List_meta <- Final_CLS_2022_Study_List_2 %>%
select(ID, pa, tactic, grouped_conversion, region_v2)
#tSNE_fit <- Final_CLS_2022_Study_List_2 %>%
# select(ID,treatment_user_count, exposed, control_user_count, control, scaling_factor,
# cost_spent_on_exposed_group, absolute_lift, relative_lift, Significant_Spend) %>%
#column_to_rownames("ID") %>%
#scale() %>%
#Rtsne()
#Selecting only numerical columns and fitting the data
tSNE_fit <- Final_CLS_2022_Study_List_2 %>%
select(ID,treatment_user_count, exposed, cost_spent_on_exposed_group, absolute_lift, relative_lift, Significant_Spend) %>%
column_to_rownames("ID") %>%
scale() %>%
Rtsne()
#turning results into dataframe
tSNE_df <- tSNE_fit$Y %>%
as.data.frame() %>%
rename(tSNE1="V1", tSNE2="V2") %>%
mutate(ID = row_number())
#joining categorical and numerical columns
tSNE_df <- tSNE_df %>%
inner_join(Final_CLS_2022_Study_List_meta, by = "ID")
tSNE_df %>% head()
tSNE_df %>%
ggplot(aes(x = tSNE1,
y = tSNE2,
color = grouped_conversion)) +
geom_point() +
theme(legend.position="bottom")NAFinal_CLS_2022_Study_List_2 <-
Final_CLS_2022_Study_List %>%
filter(channel =="Search") %>%
filter(tactic != "All") %>%
select(-c(study_id, year, quarter)) %>%
mutate(ID = row_number())
Final_CLS_2022_Study_List_meta <- Final_CLS_2022_Study_List_2 %>%
select(ID, pa, tactic, grouped_conversion, region_v2)
#tSNE_fit <- Final_CLS_2022_Study_List_2 %>%
# select(ID,treatment_user_count, exposed, control_user_count, control, scaling_factor,
# cost_spent_on_exposed_group, absolute_lift, relative_lift, Significant_Spend) %>%
#column_to_rownames("ID") %>%
#scale() %>%
#Rtsne()
tSNE_fit <- Final_CLS_2022_Study_List_2 %>%
select(ID,treatment_user_count, exposed, cost_spent_on_exposed_group, absolute_lift, relative_lift, Significant_Spend) %>%
column_to_rownames("ID") %>%
scale() %>%
Rtsne()
tSNE_df <- tSNE_fit$Y %>%
as.data.frame() %>%
rename(tSNE1="V1", tSNE2="V2") %>%
mutate(ID = row_number())
tSNE_df <- tSNE_df %>%
inner_join(Final_CLS_2022_Study_List_meta, by = "ID")
tSNE_df %>% head()
tSNE_df %>%
ggplot(aes(x = tSNE1,
y = tSNE2,
color = pa)) +
geom_point() +
theme(legend.position="bottom")##Stats table of features wit and witout Outliers - calculated using IQR method
Final_CLS_2022_Study_List %>%
filter(channel == "Search") %>%
filter(tactic != "All") %>%
diagnose_outlier() %>%
flextable()variables | outliers_cnt | outliers_ratio | outliers_mean | with_mean | without_mean |
study_id | 0 | 0.0000 | 11.65161 | 11.65161 | |
year | 0 | 0.0000 | 2,021.53548 | 2,021.53548 | |
treatment_user_count | 23 | 14.8387 | 2,237,545.47826 | 457,762.51613 | 147,648.81818 |
exposed | 8 | 5.1613 | 665,873.00000 | 71,207.11613 | 38,844.34694 |
control_user_count | 18 | 11.6129 | 284,461.77778 | 44,631.42581 | 13,120.86861 |
scaled_control | 9 | 5.8065 | 544,691.29931 | 62,111.68960 | 32,363.63146 |
control | 18 | 11.6129 | 9,691.05556 | 1,866.98710 | 839.00730 |
scaling_factor | 0 | 0.0000 | 31.27883 | 31.27883 | |
cost_spent_on_exposed_group | 28 | 18.0645 | 625,606.29893 | 134,528.59452 | 26,259.49433 |
absolute_lift | 13 | 8.3871 | 58,042.93293 | 9,095.42653 | 4,614.31679 |
relative_lift | 8 | 5.1613 | 4.09190 | 0.73080 | 0.54788 |
probability_of_lift | 17 | 10.9677 | 0.50921 | 0.94617 | 1.00000 |
duration | 2 | 1.2903 | 220.00000 | 72.17419 | 70.24183 |
Significant_Spend | 14 | 9.0323 | 0.00000 | 0.90968 | 1.00000 |
parsed_type | 0 | 0.0000 | 4.00000 | 4.00000 |
##Plotting Variables with and without Outliers
Final_CLS_2022_Study_List %>%
filter(channel == "Search") %>%
filter(tactic != "All") %>%
dplyr::select(absolute_lift, cost_spent_on_exposed_group) %>%
plot_outlier()NAFinal_CLS_2022_Study_List %>%
filter(channel == "Search") %>%
filter(tactic != "All") %>%
filter(pa != "Chrome") %>%
dplyr::select(absolute_lift, cost_spent_on_exposed_group) %>%
plot_outlier()#Plotting variables with and without outliers - Chrome data
Final_CLS_2022_Study_List %>%
filter(channel == "Search") %>%
filter(tactic != "All") %>%
filter(pa == "Chrome") %>%
dplyr::select(absolute_lift, cost_spent_on_exposed_group) %>%
plot_outlier()Chrome seems to be outperforming all other pa’s
Final_CLS_2022_Study_List %>%
filter(channel == "Search") %>%
filter(tactic != "All")Final_CLS_2022_Study_List %>%
filter(channel == "Search") %>%
filter(tactic == "All") %>%
filter(absolute_lift == max(absolute_lift)) %>%
select(study_id, pa)Final_CLS_2022_Study_List %>%
filter(channel == "Search") %>%
filter(tactic == "All") %>%
filter(cost_spent_on_exposed_group == max(cost_spent_on_exposed_group)) %>%
select(study_id, pa)Final_CLS_2022_Study_List %>%
filter(channel == "Search") %>%
filter(tactic != "All") %>%
filter(pa == "Chrome") %>%
select(absolute_lift, cost_spent_on_exposed_group, relative_lift, pa, region_v2, grouped_conversion) %>%
plot_boxplot(by = "region_v2")Since Chrome only contains Desktop Downloads, this could be contributing to why it has such high lift
Final_CLS_2022_Study_List %>%
filter(channel == "Search") %>%
filter(tactic != "All") %>%
filter(pa == "Chrome") %>%
select(absolute_lift, cost_spent_on_exposed_group, relative_lift, pa, region_v2, grouped_conversion) %>%
plot_boxplot(by = "grouped_conversion")#seeing what regions are in Chrome
Final_CLS_2022_Study_List %>%
filter(channel == "Search") %>%
filter(tactic != "All") %>%
filter(pa == "Chrome") %>%
ggplot(aes(x = cost_spent_on_exposed_group, y = absolute_lift, color = region_v2)) +
geom_point() +
xlab("Cost") +
ylab("Lift") +
ggtitle("Cost vs. Lift")NAFinal_CLS_2022_Study_List %>%
filter(channel == "Search") %>%
filter(tactic != "All") %>%
filter(pa == "Chrome") %>%
group_by(region_v2) %>%
summarise(count_regions = n())Final_CLS_2022_Study_List %>%
filter(channel == "Search") %>%
filter(tactic != "All") %>%
filter(pa == "Chrome") %>%
group_by(Significant_Spend) %>%
summarise(total_count = n())scatter_stats_pa <- function(pa_val) {
plot <- Final_CLS_2022_Study_List %>%
filter(channel == "Search") %>%
filter(tactic != "All") %>%
filter(pa == pa_val) %>%
ggscatterstats(
x = cost_spent_on_exposed_group,
y = absolute_lift,
type = "np")
print(plot)
}
for (i in pa_vals) {
scatter_stats_pa(i)
}Registered S3 method overwritten by 'ggside':
method from
+.gg GGally
scatter_stats_region <- function(region_val) {
plot <- Final_CLS_2022_Study_List %>%
filter(channel == "Search") %>%
filter(tactic != "All") %>%
filter(region_v2 == region_val) %>%
ggscatterstats(
x = cost_spent_on_exposed_group,
y = absolute_lift,
type = "np")
print(plot)
}
for (i in region_cols) {
scatter_stats_region(i)
}#Evaluating stats with and without Outliers for all PA’s
Final_CLS_2022_Study_List %>%
filter(channel == "Search") %>%
filter(tactic != "All") %>%
filter(pa == "Chrome") %>%
select(treatment_user_count, exposed, control_user_count, cost_spent_on_exposed_group, absolute_lift) %>%
diagnose_outlier() %>%
flextable()variables | outliers_cnt | outliers_ratio | outliers_mean | with_mean | without_mean |
treatment_user_count | 8 | 8.6957 | 958,318 | 177,532.3 | 103,171.7 |
exposed | 8 | 8.6957 | 665,873 | 112,395.6 | 59,683.5 |
control_user_count | 10 | 10.8696 | 21,901 | 4,380.1 | 2,243.5 |
cost_spent_on_exposed_group | 8 | 8.6957 | 162,164 | 19,748.2 | 6,184.8 |
absolute_lift | 7 | 7.6087 | 85,733 | 13,082.8 | 7,099.8 |
Final_CLS_2022_Study_List %>%
filter(channel == "Search") %>%
filter(tactic != "All") %>%
filter(pa == "Chromebook") %>%
select(treatment_user_count, exposed, control_user_count, cost_spent_on_exposed_group, absolute_lift) %>%
diagnose_outlier() %>%
flextable()variables | outliers_cnt | outliers_ratio | outliers_mean | with_mean | without_mean |
treatment_user_count | 2 | 16.667 | 145,274 | 71,247.8 | 56,442.6 |
exposed | 0 | 0.000 | 3,176.6 | 3,176.6 | |
control_user_count | 2 | 16.667 | 64,622 | 31,645.7 | 25,050.5 |
cost_spent_on_exposed_group | 0 | 0.000 | 33,131.3 | 33,131.3 | |
absolute_lift | 0 | 0.000 | 1,659.3 | 1,659.3 |
Final_CLS_2022_Study_List %>%
filter(channel == "Search") %>%
filter(tactic != "All") %>%
filter(pa == "Pixel") %>%
select(treatment_user_count, exposed, control_user_count, cost_spent_on_exposed_group, absolute_lift) %>%
diagnose_outlier() %>%
flextable()variables | outliers_cnt | outliers_ratio | outliers_mean | with_mean | without_mean |
treatment_user_count | 1 | 14.286 | 356,596 | 120,181.57 | 80,779.17 |
exposed | 0 | 0.000 | 2,469.00 | 2,469.00 | |
control_user_count | 0 | 0.000 | 53,427.71 | 53,427.71 | |
cost_spent_on_exposed_group | 1 | 14.286 | 584,378 | 118,953.97 | 41,383.27 |
absolute_lift | 0 | 0.000 | 349.38 | 349.38 |
Final_CLS_2022_Study_List %>%
filter(channel == "Search") %>%
filter(tactic != "All") %>%
filter(pa == "Chrome") %>%
select(treatment_user_count, exposed, control_user_count, cost_spent_on_exposed_group, absolute_lift) %>%
diagnose_outlier() %>%
flextable()variables | outliers_cnt | outliers_ratio | outliers_mean | with_mean | without_mean |
treatment_user_count | 8 | 8.6957 | 958,318 | 177,532.3 | 103,171.7 |
exposed | 8 | 8.6957 | 665,873 | 112,395.6 | 59,683.5 |
control_user_count | 10 | 10.8696 | 21,901 | 4,380.1 | 2,243.5 |
cost_spent_on_exposed_group | 8 | 8.6957 | 162,164 | 19,748.2 | 6,184.8 |
absolute_lift | 7 | 7.6087 | 85,733 | 13,082.8 | 7,099.8 |
Final_CLS_2022_Study_List %>%
filter(channel == "Search") %>%
filter(tactic != "All") %>%
filter(pa == "Google Cloud") %>%
select(treatment_user_count, exposed, control_user_count, cost_spent_on_exposed_group, absolute_lift) %>%
diagnose_outlier() %>%
flextable()variables | outliers_cnt | outliers_ratio | outliers_mean | with_mean | without_mean |
treatment_user_count | 3 | 9.6774 | 7,406,089 | 1,572,608.4 | 947,592.6 |
exposed | 5 | 16.1290 | 31,169 | 9,704.2 | 5,576.3 |
control_user_count | 5 | 16.1290 | 690,457 | 164,274.7 | 63,085.8 |
cost_spent_on_exposed_group | 4 | 12.9032 | 1,422,370 | 407,884.6 | 257,590.5 |
absolute_lift | 3 | 9.6774 | 22,877 | 5,474.3 | 3,609.7 |
#install.packages("gtsummary")Final_CLS_2022_Study_List %>%
filter(channel == "Search") %>%
filter(tactic != "All") %>%
select(region_v2, pa, channel, tactic, grouped_conversion, cost_spent_on_exposed_group, absolute_lift) %>%
tbl_summary(by = pa) %>%
add_p()There was an error in 'add_p()/add_difference()' for variable 'region_v2', p-value omitted:
Error in stats::fisher.test(c("AMER_USCA", "AMER_USCA", "AMER", "AMER", : FEXACT error 6 (f5xact). LDKEY=621 is too small for this problem: kval=14082182.
Try increasing the size of the workspace.
There was an error in 'add_p()/add_difference()' for variable 'channel', p-value omitted:
Error in stats::chisq.test(x = c("Search", "Search", "Search", "Search", : 'x' and 'y' must have at least 2 levels
There was an error in 'add_p()/add_difference()' for variable 'tactic', p-value omitted:
Error in stats::fisher.test(c("AO", "SKWS", "AO", "AO", "AO", "AO", "AO", : FEXACT error 7(location). LDSTP=18630 is too small for this problem,
(pastp=23.8233, ipn_0:=ipoin[itp=484]=5476, stp[ipn_0]=14.5866).
Increase workspace or consider using 'simulate.p.value=TRUE'
There was an error in 'add_p()/add_difference()' for variable 'grouped_conversion', p-value omitted:
Error in stats::fisher.test(c("Mobile Conversions", "Mobile Conversions", : FEXACT error 6. LDKEY=621 is too small for this problem,
(ii := key2[itp=602] = 4220911, ldstp=18630)
Try increasing the size of the workspace and possibly 'mult'
| Characteristic | Chrome, N = 921 | Chromebook, N = 121 | DSM, N = 131 | Google Cloud, N = 311 | Pixel, N = 71 | p-value2 |
|---|---|---|---|---|---|---|
| region_v2 | ||||||
| AMER | 20 (22%) | 0 (0%) | 4 (31%) | 14 (45%) | 0 (0%) | |
| AMER_USCA | 7 (7.6%) | 2 (17%) | 9 (69%) | 3 (9.7%) | 4 (57%) | |
| AO | 5 (5.4%) | 2 (17%) | 0 (0%) | 0 (0%) | 0 (0%) | |
| APAC | 35 (38%) | 2 (17%) | 0 (0%) | 12 (39%) | 0 (0%) | |
| EMEA | 25 (27%) | 6 (50%) | 0 (0%) | 2 (6.5%) | 3 (43%) | |
| channel | ||||||
| Search | 92 (100%) | 12 (100%) | 13 (100%) | 31 (100%) | 7 (100%) | |
| tactic | ||||||
| AO | 23 (25%) | 0 (0%) | 11 (85%) | 10 (32%) | 5 (71%) | |
| BKWS | 32 (35%) | 6 (50%) | 2 (15%) | 11 (35%) | 1 (14%) | |
| SKWS | 37 (40%) | 6 (50%) | 0 (0%) | 10 (32%) | 1 (14%) | |
| grouped_conversion | ||||||
| Chromebook Referrals | 0 (0%) | 12 (100%) | 0 (0%) | 0 (0%) | 0 (0%) | |
| Desktop Downloads | 92 (100%) | 0 (0%) | 0 (0%) | 0 (0%) | 0 (0%) | |
| GWS MCC - Free Trial Submit. | 0 (0%) | 0 (0%) | 0 (0%) | 31 (100%) | 0 (0%) | |
| Mobile Conversions | 0 (0%) | 0 (0%) | 0 (0%) | 0 (0%) | 7 (100%) | |
| Non-Mobile Device Conversions | 0 (0%) | 0 (0%) | 13 (100%) | 0 (0%) | 0 (0%) | |
| cost_spent_on_exposed_group | 4,121 (2,207, 12,436) | 23,577 (11,407, 51,539) | 196,425 (53,156, 357,802) | 301,800 (126,877, 443,691) | 54,965 (23,771, 71,298) | <0.001 |
| absolute_lift | 6,967 (1,306, 13,439) | 1,303 (548, 2,244) | 431 (47, 1,060) | 3,234 (2,367, 5,712) | 211 (75, 534) | <0.001 |
| 1 n (%); Median (IQR) | ||||||
| 2 Kruskal-Wallis rank sum test | ||||||
Search_no_tactic <- Final_CLS_2022_Study_List %>%
filter(channel == "Search") %>%
filter(tactic != "All") %>%
kruskal.test(absolute_lift ~ pa)Warning: 'x' is a list, so ignoring argument 'g'Warning: some elements of 'x' are not numeric and will be coerced to numeric
Search_no_tactic <- Final_CLS_2022_Study_List %>%
filter(channel == "Search") %>%
filter(tactic != "All")kruskal.test(absolute_lift ~ pa, data = Search_no_tactic)
Kruskal-Wallis rank sum test
data: absolute_lift by pa
Kruskal-Wallis chi-squared = 33.3, df = 4, p-value = 0.000001
kruskal.test(absolute_lift ~ region_v2, data = Search_no_tactic)
Kruskal-Wallis rank sum test
data: absolute_lift by region_v2
Kruskal-Wallis chi-squared = 18.4, df = 4, p-value = 0.001
pairwise.wilcox.test(Search_no_tactic$absolute_lift, Search_no_tactic$pa,
p.adjust.method = "BH")
Pairwise comparisons using Wilcoxon rank sum test with continuity correction
data: Search_no_tactic$absolute_lift and Search_no_tactic$pa
Chrome Chromebook DSM Google Cloud
Chromebook 0.009 - - -
DSM 0.0009 0.032 - -
Google Cloud 0.147 0.004 0.0002 -
Pixel 0.002 0.009 0.393 0.0001
P value adjustment method: BH